R Markdown

In this document we are compiling all of our models for the class imbalance paradigm.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(patchwork)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(corrplot)
## corrplot 0.84 loaded
library(ggfortify)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(class) #knn
library(gmodels) # CrossTable()
library(caret) # creatFolds()
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(caTools) #sample.split()
library(ROCR) # prediction(), performance()
library(MLeval)
library(MLmetrics)
## 
## Attaching package: 'MLmetrics'
## The following objects are masked from 'package:caret':
## 
##     MAE, RMSE
## The following object is masked from 'package:psych':
## 
##     AUC
## The following object is masked from 'package:base':
## 
##     Recall
library(sparseLDA)
library(kernlab)
## 
## Attaching package: 'kernlab'
## The following object is masked from 'package:psych':
## 
##     alpha
## The following object is masked from 'package:purrr':
## 
##     cross
## The following object is masked from 'package:ggplot2':
## 
##     alpha
library(stepPlr)
library(ROCit)
## 
## Attaching package: 'ROCit'
## The following object is masked from 'package:psych':
## 
##     logit
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact
set.seed(123)

Load Test/Train Datasets

set all targets to factor | Index all other columns for scaling

## Parsed with column specification:
## cols(
##   .default = col_double()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double()
## )
## See spec(...) for full column specifications.
## Warning: Unknown or uninitialised column: `targets`.

## Warning: Unknown or uninitialised column: `targets`.
## 
## The training DF is 17906 instances long
## The training DF is 4474 instances long
Falls_train_df<-train_df[train_df$Target==1,]
noFalls_train_df<-train_df[train_df$Target==0,]
inds<-seq(from=1,by=1,to=dim(noFalls_train_df)[1])
noFall_ind<-sample(inds,size=dim(Falls_train_df)[1])
noFalls_train_df<-noFalls_train_df[noFall_ind,]
train_df<-rbind(noFalls_train_df,Falls_train_df)
Falls_test_df<-test_df[test_df$Target==1,]
noFalls_test_df<-test_df[test_df$Target==0,]
inds<-seq(from=1,by=1,to=dim(noFalls_test_df)[1])
noFall_ind<-sample(inds,size=dim(Falls_test_df)[1])
noFalls_test_df<-noFalls_test_df[noFall_ind,]
test_df<-rbind(noFalls_test_df,Falls_test_df)
levels(train_df$Target)[levels(train_df$Target)=="1"] <- "yes"
levels(train_df$Target)[levels(train_df$Target)=="0"] <- "no"
trainResults <- function(model, architecture){
  for_lift <- data.frame(Class = model$pred$obs, rf = model$pred$R, resample = model$pred$Resample)
  lift_df <-  data.frame()
  for (fold in unique(for_lift$resample)) {
    fold_df <- dplyr::filter(for_lift, resample == fold)
    lift_obj_data <- lift(Class ~ rf, data = fold_df, class = "R")$data
    lift_obj_data$fold = fold
    lift_df = rbind(lift_df, lift_obj_data)
  }
  lift_obj <- lift(Class ~ rf, data = for_lift, class = "R")
  library(plyr)
  accuracy <- ddply(model$pred, "Resample", summarise,
        accuracy = Accuracy(pred, obs))
  res <- evalm(list(model_lr),gnames=c(architecture))  
  return(accuracy)
}

errorRate <- function(model, test){
  1-mean(model==test)
}

Build LR Here

k = 5
myControl_lr <- trainControl(
                             method = "repeatedcv", number = k,
                             summaryFunction = twoClassSummary,
                             classProbs = TRUE,
                             verboseIter = FALSE,
                             savePredictions = TRUE,
                             allowParallel = FALSE
                            )
myGrid_lr <-  expand.grid(.lambda=10^seq(-3, 3, length = 100), 
                          .cp="bic")
model_lr <- train(Target ~., 
                 data = train_df, 
                 method = "plr",
                 tuneGrid = myGrid_lr, 
                 metric = "ROC",
                 trControl = myControl_lr,
                 preProcess = c("center", "scale"))

model_lr
## Penalized Logistic Regression 
## 
## 662 samples
## 108 predictors
##   2 classes: 'no', 'yes' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (5 fold, repeated 1 times) 
## Summary of sample sizes: 530, 530, 529, 529, 530 
## Resampling results across tuning parameters:
## 
##   lambda        ROC        Sens  Spec     
##   1.000000e-03  0.9997704  1     0.9969697
##   1.149757e-03  0.9997704  1     0.9969697
##   1.321941e-03  0.9997704  1     0.9969697
##   1.519911e-03  0.9997704  1     0.9969697
##   1.747528e-03  0.9997704  1     0.9969697
##   2.009233e-03  0.9997704  1     0.9969697
##   2.310130e-03  0.9997704  1     0.9969697
##   2.656088e-03  0.9997704  1     0.9969697
##   3.053856e-03  0.9997704  1     0.9969697
##   3.511192e-03  0.9997704  1     0.9969697
##   4.037017e-03  0.9997704  1     0.9969697
##   4.641589e-03  0.9997704  1     0.9969697
##   5.336699e-03  0.9997704  1     0.9969697
##   6.135907e-03  0.9997704  1     0.9969697
##   7.054802e-03  0.9997704  1     0.9969697
##   8.111308e-03  0.9997704  1     0.9969697
##   9.326033e-03  0.9997704  1     0.9969697
##   1.072267e-02  0.9997704  1     0.9969697
##   1.232847e-02  0.9997704  1     0.9969697
##   1.417474e-02  0.9997704  1     0.9969697
##   1.629751e-02  0.9997704  1     0.9969697
##   1.873817e-02  0.9997704  1     0.9969697
##   2.154435e-02  0.9997704  1     0.9969697
##   2.477076e-02  0.9997704  1     0.9969697
##   2.848036e-02  0.9997704  1     0.9969697
##   3.274549e-02  0.9997704  1     0.9969697
##   3.764936e-02  0.9997704  1     0.9969697
##   4.328761e-02  0.9997704  1     0.9969697
##   4.977024e-02  0.9997704  1     0.9969697
##   5.722368e-02  0.9997704  1     0.9969697
##   6.579332e-02  0.9997704  1     0.9969697
##   7.564633e-02  0.9997704  1     0.9969697
##   8.697490e-02  0.9997704  1     0.9969697
##   1.000000e-01  0.9997704  1     0.9969697
##   1.149757e-01  0.9997704  1     0.9969697
##   1.321941e-01  0.9997704  1     0.9969697
##   1.519911e-01  0.9997704  1     0.9969697
##   1.747528e-01  0.9997704  1     0.9969697
##   2.009233e-01  0.9997704  1     0.9969697
##   2.310130e-01  0.9997704  1     0.9969697
##   2.656088e-01  0.9997704  1     0.9969697
##   3.053856e-01  0.9997704  1     0.9969697
##   3.511192e-01  0.9997704  1     0.9969697
##   4.037017e-01  0.9997704  1     0.9969697
##   4.641589e-01  0.9997704  1     0.9969697
##   5.336699e-01  0.9997704  1     0.9969697
##   6.135907e-01  0.9997245  1     0.9969697
##   7.054802e-01  0.9997245  1     0.9969697
##   8.111308e-01  0.9997245  1     0.9969697
##   9.326033e-01  0.9997245  1     0.9969697
##   1.072267e+00  0.9997245  1     0.9969697
##   1.232847e+00  0.9997245  1     0.9969697
##   1.417474e+00  0.9997245  1     0.9969697
##   1.629751e+00  0.9996786  1     0.9969697
##   1.873817e+00  0.9996786  1     0.9969697
##   2.154435e+00  0.9996786  1     0.9969697
##   2.477076e+00  0.9996786  1     0.9969697
##   2.848036e+00  0.9996786  1     0.9969697
##   3.274549e+00  0.9996786  1     0.9969697
##   3.764936e+00  0.9996786  1     0.9969697
##   4.328761e+00  0.9996786  1     0.9969697
##   4.977024e+00  0.9996786  1     0.9969697
##   5.722368e+00  0.9996786  1     0.9969697
##   6.579332e+00  0.9996327  1     0.9969697
##   7.564633e+00  0.9996327  1     0.9969697
##   8.697490e+00  0.9996327  1     0.9969697
##   1.000000e+01  0.9996327  1     0.9939394
##   1.149757e+01  0.9995868  1     0.9939394
##   1.321941e+01  0.9995868  1     0.9939394
##   1.519911e+01  0.9995868  1     0.9939394
##   1.747528e+01  0.9995868  1     0.9939394
##   2.009233e+01  0.9995868  1     0.9939394
##   2.310130e+01  0.9995409  1     0.9939394
##   2.656088e+01  0.9995409  1     0.9939394
##   3.053856e+01  0.9994949  1     0.9939394
##   3.511192e+01  0.9994490  1     0.9939394
##   4.037017e+01  0.9994031  1     0.9939394
##   4.641589e+01  0.9994031  1     0.9939394
##   5.336699e+01  0.9994031  1     0.9939394
##   6.135907e+01  0.9994031  1     0.9939394
##   7.054802e+01  0.9993572  1     0.9939394
##   8.111308e+01  0.9993113  1     0.9939394
##   9.326033e+01  0.9993113  1     0.9939394
##   1.072267e+02  0.9993113  1     0.9939394
##   1.232847e+02  0.9992654  1     0.9939394
##   1.417474e+02  0.9992654  1     0.9939394
##   1.629751e+02  0.9992654  1     0.9939394
##   1.873817e+02  0.9992195  1     0.9939394
##   2.154435e+02  0.9992195  1     0.9939394
##   2.477076e+02  0.9992195  1     0.9939394
##   2.848036e+02  0.9992195  1     0.9939394
##   3.274549e+02  0.9992195  1     0.9939394
##   3.764936e+02  0.9992195  1     0.9939394
##   4.328761e+02  0.9992195  1     0.9939394
##   4.977024e+02  0.9992195  1     0.9939394
##   5.722368e+02  0.9992195  1     0.9939394
##   6.579332e+02  0.9992195  1     0.9939394
##   7.564633e+02  0.9991736  1     0.9939394
##   8.697490e+02  0.9991736  1     0.9939394
##   1.000000e+03  0.9991736  1     0.9939394
## 
## Tuning parameter 'cp' was held constant at a value of bic
## ROC was used to select the optimal model using the largest value.
## The final values used for the model were lambda = 0.5336699 and cp = bic.

See model summary

summary(model_lr)
## 
## Call:
## stepPlr::plr(x = x, y = y, weights = if (!is.null(wts)) wts else rep(1, 
##     length(y)), lambda = param$lambda, cp = as.character(param$cp))
## 
## Coefficients:
##                   Estimate Std.Error z value Pr(>|z|)
## Intercept         -0.09200   1.12721  -0.082    0.935
## accX_entropy      -0.11725   0.44381  -0.264    0.792
## accY_entropy      -0.04349   0.43798  -0.099    0.921
## accZ_entropy       0.00404   0.43667   0.009    0.993
## angX_entropy       0.03266   0.44002   0.074    0.941
## angY_entropy      -0.12384   0.44708  -0.277    0.782
## angZ_entropy      -0.10108   0.44978  -0.225    0.822
## maxX_entropy      -0.16501   0.43932  -0.376    0.707
## magY_entropy       0.07913   0.42883   0.185    0.853
## magZ_entropy      -0.17710   0.41906  -0.423    0.672
## accX_mean_energy   0.12190   0.24957   0.488    0.626
## accY_mean_energy  -0.25998   0.20538  -1.266    0.206
## accZ_mean_energy  -0.07027   0.19497  -0.360    0.719
## angX_mean_energy   0.02043   0.19774   0.103    0.918
## angY_mean_energy   0.08907   0.20021   0.445    0.656
## angZ_mean_energy   0.10928   0.20016   0.546    0.585
## maxX_mean_energy  -0.05991   0.23363  -0.256    0.798
## magY_mean_energy  -0.08350   0.21934  -0.381    0.703
## magZ_mean_energy  -0.00388   0.19924  -0.019    0.985
## accX_total_energy  0.12190   0.24957   0.488    0.626
## accY_total_energy -0.25998   0.20538  -1.266    0.206
## accZ_total_energy -0.07027   0.19497  -0.360    0.719
## angX_total_energy  0.02043   0.19774   0.103    0.918
## angY_total_energy  0.08907   0.20021   0.445    0.656
## angZ_total_energy  0.10928   0.20016   0.546    0.585
## maxX_total_energy -0.05991   0.23363  -0.256    0.798
## magY_total_energy -0.08350   0.21934  -0.381    0.703
## magZ_total_energy -0.00388   0.19924  -0.019    0.985
## accX_rms           0.49662   0.15399   3.225    0.001
## accY_rms           0.62344   0.23456   2.658    0.008
## accZ_rms           0.02583   0.28285   0.091    0.927
## angX_rms           0.27318   0.30946   0.883    0.377
## angY_rms           0.09899   0.30271   0.327    0.744
## angZ_rms          -0.11642   0.23204  -0.502    0.616
## maxX_rms           0.11239   0.18800   0.598    0.550
## magY_rms           0.20096   0.20743   0.969    0.333
## magZ_rms          -0.12463   0.30833  -0.404    0.686
## accX_min          -0.49894   0.17180  -2.904    0.004
## accY_min           0.57132   0.15459   3.696    0.000
## accZ_min           0.17005   0.25146   0.676    0.499
## angX_min          -0.00815   0.26607  -0.031    0.975
## angY_min          -0.03856   0.25705  -0.150    0.881
## angZ_min          -0.21767   0.28976  -0.751    0.453
## maxX_min           0.20318   0.24046   0.845    0.398
## magY_min          -0.19462   0.26091  -0.746    0.456
## magZ_min           0.22182   0.26366   0.841    0.400
## accX_max          -0.45294   0.21313  -2.125    0.034
## accY_max           0.57416   0.18644   3.080    0.002
## accZ_max           0.18940   0.30528   0.620    0.535
## angX_max           0.04465   0.27656   0.161    0.872
## angY_max           0.06291   0.29874   0.211    0.833
## angZ_max          -0.04584   0.25622  -0.179    0.858
## maxX_max           0.14198   0.25006   0.568    0.570
## magY_max          -0.22135   0.21913  -1.010    0.312
## magZ_max           0.00544   0.27569   0.020    0.984
## accX_q1           -0.49815   0.16265  -3.063    0.002
## accY_q1            0.52055   0.14621   3.560    0.000
## accZ_q1            0.24068   0.25660   0.938    0.348
## angX_q1           -0.03456   0.24853  -0.139    0.889
## angY_q1            0.04526   0.26737   0.169    0.866
## angZ_q1           -0.17417   0.27081  -0.643    0.520
## maxX_q1            0.13227   0.21103   0.627    0.531
## magY_q1           -0.17412   0.22740  -0.766    0.444
## magZ_q1            0.20898   0.22679   0.921    0.357
## accX_q3           -0.49096   0.18829  -2.607    0.009
## accY_q3            0.50093   0.15978   3.135    0.002
## accZ_q3            0.12677   0.26700   0.475    0.635
## angX_q3            0.20071   0.26079   0.770    0.441
## angY_q3            0.07030   0.28660   0.245    0.806
## angZ_q3           -0.13289   0.23528  -0.565    0.572
## maxX_q3            0.06829   0.21479   0.318    0.750
## magY_q3           -0.19434   0.21225  -0.916    0.360
## magZ_q3            0.20746   0.24702   0.840    0.401
## accX_sd            0.16574   0.25831   0.642    0.521
## accY_sd           -0.14162   0.23257  -0.609    0.543
## accZ_sd           -0.02429   0.21463  -0.113    0.910
## angX_sd            0.08145   0.21009   0.388    0.698
## angY_sd            0.06433   0.21012   0.306    0.760
## angZ_sd            0.10504   0.22287   0.471    0.638
## maxX_sd           -0.04995   0.23683  -0.211    0.833
## magY_sd           -0.08599   0.23787  -0.361    0.718
## magZ_sd           -0.03666   0.22125  -0.166    0.868
## accX_skewness     -0.02446   0.42263  -0.058    0.954
## accY_skewness      0.17488   0.40503   0.432    0.666
## accZ_skewness     -0.14929   0.37866  -0.394    0.694
## angX_skewness     -0.06207   0.40250  -0.154    0.878
## angY_skewness      0.09911   0.41364   0.240    0.810
## angZ_skewness      0.07952   0.40868   0.195    0.845
## maxX_skewness      0.14981   0.41509   0.361    0.718
## magY_skewness     -0.25000   0.39730  -0.629    0.529
## magZ_skewness     -0.11068   0.40058  -0.276    0.783
## accX_kurtosis     -0.00410   0.42923  -0.010    0.992
## accY_kurtosis     -0.31021   0.38069  -0.815    0.415
## accZ_kurtosis     -0.08858   0.41348  -0.214    0.831
## angX_kurtosis      0.01813   0.42598   0.043    0.966
## angY_kurtosis     -0.02957   0.43143  -0.069    0.945
## angZ_kurtosis      0.04077   0.43109   0.095    0.924
## maxX_kurtosis     -0.06297   0.42891  -0.147    0.883
## magY_kurtosis     -0.11769   0.38999  -0.302    0.763
## magZ_kurtosis      0.03792   0.40532   0.094    0.925
## accX_median       -0.45984   0.17604  -2.612    0.009
## accY_median        0.52159   0.19941   2.616    0.009
## accZ_median        0.25837   0.26380   0.979    0.328
## angX_median        0.07602   0.28684   0.265    0.791
## angY_median       -0.07822   0.30382  -0.257    0.797
## angZ_median       -0.21698   0.26306  -0.825    0.409
## maxX_median        0.08267   0.23253   0.356    0.722
## magY_median       -0.17306   0.24549  -0.705    0.481
## magZ_median        0.22490   0.26446   0.850    0.395
## 
##     Null deviance: 917.73 on 661 degrees of freedom
## Residual deviance: 2.49 on 636.8 degrees of freedom
##             Score: deviance + 6.5 * df = 166.17
max((model_lr$results)$ROC)
## [1] 0.9997704

See tuning results

plot(model_lr)

trellis.par.set(caretTheme())
densityplot(model_lr, pch = "|")

lr_train_results <- trainResults(model_lr, "Logistic Regression")
## ***MLeval: Machine Learning Model Evaluation***
## Input: caret train function object
## Averaging probs.
## Group 1 type: repeatedcv
## Observations: 662
## Number of groups: 1
## Observations per group: 662
## Positive: yes
## Negative: no
## Group: Logistic Regression
## Positive: 331
## Negative: 331
## ***Performance Metrics***

## Logistic Regression Optimal Informedness = 0.996978851963746
## Logistic Regression AUC-ROC = 1

lr_train_results
##     Resample  accuracy
## 1 Fold1.Rep1 0.9974242
## 2 Fold2.Rep1 1.0000000
## 3 Fold3.Rep1 1.0000000
## 4 Fold4.Rep1 1.0000000
## 5 Fold5.Rep1 0.9924242
cat("5-fold train accuracy: ", mean(lr_train_results[,2]))
## 5-fold train accuracy:  0.9979697
library(ROCit)
prediction_lr <- predict(model_lr, test_df, type = "prob")
ROCit_lr <- rocit(score=prediction_lr[,2],class=test_df$Target)
plot(ROCit_lr, legend = TRUE, YIndex = FALSE, values = TRUE)

summary(ROCit_lr)
##                           
##  Method used: empirical   
##  Number of positive(s): 83
##  Number of negative(s): 83
##  Area under curve: 0.7609
lr_binary <- ifelse(prediction_lr[,2]>0.5, 1, 0)
lr_error <- errorRate(lr_binary, test_df$Target)
cat("\nTest accuracy:   ", 1-lr_error,
    "\nTest error rate: ", lr_error,
    "\nTest AUC:        ", ROCit_lr$AUC)
## 
## Test accuracy:    0.6987952 
## Test error rate:  0.3012048 
## Test AUC:         0.7609232
confusionMatrix(table(lr_binary, test_df$Target), positive="1")
## Confusion Matrix and Statistics
## 
##          
## lr_binary  0  1
##         0 53 20
##         1 30 63
##                                           
##                Accuracy : 0.6988          
##                  95% CI : (0.6229, 0.7675)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : 1.605e-07       
##                                           
##                   Kappa : 0.3976          
##                                           
##  Mcnemar's Test P-Value : 0.2031          
##                                           
##             Sensitivity : 0.7590          
##             Specificity : 0.6386          
##          Pos Pred Value : 0.6774          
##          Neg Pred Value : 0.7260          
##              Prevalence : 0.5000          
##          Detection Rate : 0.3795          
##    Detection Prevalence : 0.5602          
##       Balanced Accuracy : 0.6988          
##                                           
##        'Positive' Class : 1               
## 

Build LDA Here

k = 5
myControl_lda <- trainControl(
                             method = "repeatedcv", number = k,
                             summaryFunction = twoClassSummary,
                             classProbs = TRUE,
                             verboseIter = FALSE,
                             savePredictions = TRUE,
                             allowParallel = TRUE
                            )
myGrid_lda <-  expand.grid(.NumVars = c(2:50), 
                           .lambda = c(0.01, 0.1, 1, 10))
model_lda <- train(Target ~., 
                 data = train_df, 
                 method = "sparseLDA",
                 tuneGrid = myGrid_lda, 
                 metric = "ROC",
                 trControl = myControl_lda,
                 preProcess = c("center", "scale"),
                 verbose = FALSE)

model_lda
## Sparse Linear Discriminant Analysis 
## 
## 662 samples
## 108 predictors
##   2 classes: 'no', 'yes' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (5 fold, repeated 1 times) 
## Summary of sample sizes: 529, 529, 530, 530, 530 
## Resampling results across tuning parameters:
## 
##   NumVars  lambda  ROC        Sens  Spec     
##    2        0.01   0.9981730  1     0.9939846
##    2        0.10   0.9981730  1     0.9939846
##    2        1.00   0.9981730  1     0.9939846
##    2       10.00   0.9980367  1     0.9939846
##    3        0.01   0.9986740  1     0.9939846
##    3        0.10   0.9986740  1     0.9939846
##    3        1.00   0.9985828  1     0.9939846
##    3       10.00   0.9983999  1     0.9939846
##    4        0.01   0.9987172  1     0.9939846
##    4        0.10   0.9987624  1     0.9939846
##    4        1.00   0.9988076  1     0.9939846
##    4       10.00   0.9988528  1     0.9939846
##    5        0.01   0.9987387  1     0.9939846
##    5        0.10   0.9987387  1     0.9939846
##    5        1.00   0.9987151  1     0.9939846
##    5       10.00   0.9988069  1     0.9939846
##    6        0.01   0.9994949  1     0.9939846
##    6        0.10   0.9994949  1     0.9939846
##    6        1.00   0.9994949  1     0.9939846
##    6       10.00   0.9994490  1     0.9939846
##    7        0.01   0.9996786  1     0.9939846
##    7        0.10   0.9996786  1     0.9939846
##    7        1.00   0.9996786  1     0.9939846
##    7       10.00   0.9995868  1     0.9939846
##    8        0.01   0.9997245  1     0.9939846
##    8        0.10   0.9997704  1     0.9939846
##    8        1.00   0.9997704  1     0.9939846
##    8       10.00   0.9996327  1     0.9939846
##    9        0.01   0.9997245  1     0.9939846
##    9        0.10   0.9997245  1     0.9939846
##    9        1.00   0.9997704  1     0.9939846
##    9       10.00   0.9996786  1     0.9939846
##   10        0.01   0.9997245  1     0.9939846
##   10        0.10   0.9997245  1     0.9939846
##   10        1.00   0.9997704  1     0.9939846
##   10       10.00   0.9998163  1     0.9939846
##   11        0.01   0.9997245  1     0.9939846
##   11        0.10   0.9997245  1     0.9939846
##   11        1.00   0.9997704  1     0.9939846
##   11       10.00   0.9998163  1     0.9939846
##   12        0.01   0.9997245  1     0.9939846
##   12        0.10   0.9997245  1     0.9939846
##   12        1.00   0.9997704  1     0.9939846
##   12       10.00   0.9997704  1     0.9939846
##   13        0.01   0.9997245  1     0.9939846
##   13        0.10   0.9997245  1     0.9939846
##   13        1.00   0.9997245  1     0.9939846
##   13       10.00   0.9997704  1     0.9939846
##   14        0.01   0.9996327  1     0.9939846
##   14        0.10   0.9996327  1     0.9939846
##   14        1.00   0.9996786  1     0.9939846
##   14       10.00   0.9997704  1     0.9939846
##   15        0.01   0.9997016  1     0.9939846
##   15        0.10   0.9997016  1     0.9939846
##   15        1.00   0.9997245  1     0.9939846
##   15       10.00   0.9997704  1     0.9939846
##   16        0.01   0.9997245  1     0.9939846
##   16        0.10   0.9997245  1     0.9939846
##   16        1.00   0.9997016  1     0.9939846
##   16       10.00   0.9996786  1     0.9939846
##   17        0.01   0.9997245  1     0.9939846
##   17        0.10   0.9997245  1     0.9939846
##   17        1.00   0.9997245  1     0.9939846
##   17       10.00   0.9997245  1     0.9939846
##   18        0.01   0.9997475  1     0.9939846
##   18        0.10   0.9997475  1     0.9939846
##   18        1.00   0.9997475  1     0.9939846
##   18       10.00   0.9997245  1     0.9939846
##   19        0.01   0.9997704  1     0.9939846
##   19        0.10   0.9997475  1     0.9939846
##   19        1.00   0.9997245  1     0.9939846
##   19       10.00   0.9997475  1     0.9939846
##   20        0.01   0.9983701  1     0.9939846
##   20        0.10   0.9983701  1     0.9939846
##   20        1.00   0.9997934  1     0.9939846
##   20       10.00   0.9997475  1     0.9939846
##   21        0.01   0.9983701  1     0.9939846
##   21        0.10   0.9983701  1     0.9939846
##   21        1.00   0.9998163  1     0.9939846
##   21       10.00   0.9997704  1     0.9939846
##   22        0.01   0.9983701  1     0.9939846
##   22        0.10   0.9983701  1     0.9939846
##   22        1.00   0.9983701  1     0.9939846
##   22       10.00   0.9997934  1     0.9939846
##   23        0.01   0.9983701  1     0.9939846
##   23        0.10   0.9983701  1     0.9939846
##   23        1.00   0.9983930  1     0.9939846
##   23       10.00   0.9997934  1     0.9939846
##   24        0.01   0.9983930  1     0.9939846
##   24        0.10   0.9983930  1     0.9939846
##   24        1.00   0.9983930  1     0.9939846
##   24       10.00   0.9998163  1     0.9939846
##   25        0.01   0.9984389  1     0.9939846
##   25        0.10   0.9984389  1     0.9939846
##   25        1.00   0.9984389  1     0.9939846
##   25       10.00   0.9983701  1     0.9939846
##   26        0.01   0.9984389  1     0.9939846
##   26        0.10   0.9984389  1     0.9939846
##   26        1.00   0.9984389  1     0.9939846
##   26       10.00   0.9983930  1     0.9939846
##   27        0.01   0.9984389  1     0.9939846
##   27        0.10   0.9984389  1     0.9939846
##   27        1.00   0.9984389  1     0.9939846
##   27       10.00   0.9984160  1     0.9939846
##   28        0.01   0.9984389  1     0.9939846
##   28        0.10   0.9984389  1     0.9939846
##   28        1.00   0.9984389  1     0.9939846
##   28       10.00   0.9984389  1     0.9939846
##   29        0.01   0.9984389  1     0.9939846
##   29        0.10   0.9984389  1     0.9939846
##   29        1.00   0.9984389  1     0.9939846
##   29       10.00   0.9984389  1     0.9939846
##   30        0.01   0.9984389  1     0.9939846
##   30        0.10   0.9984389  1     0.9939846
##   30        1.00   0.9984619  1     0.9939846
##   30       10.00   0.9984389  1     0.9939846
##   31        0.01   0.9984619  1     0.9939846
##   31        0.10   0.9984619  1     0.9939846
##   31        1.00   0.9984619  1     0.9939846
##   31       10.00   0.9984389  1     0.9939846
##   32        0.01   0.9984619  1     0.9939846
##   32        0.10   0.9984619  1     0.9939846
##   32        1.00   0.9984619  1     0.9939846
##   32       10.00   0.9984389  1     0.9939846
##   33        0.01   0.9984619  1     0.9939846
##   33        0.10   0.9984619  1     0.9939846
##   33        1.00   0.9984619  1     0.9939846
##   33       10.00   0.9984389  1     0.9939846
##   34        0.01   0.9984619  1     0.9939846
##   34        0.10   0.9984619  1     0.9939846
##   34        1.00   0.9984619  1     0.9939846
##   34       10.00   0.9984389  1     0.9939846
##   35        0.01   0.9984619  1     0.9939846
##   35        0.10   0.9984619  1     0.9939846
##   35        1.00   0.9984619  1     0.9939846
##   35       10.00   0.9984389  1     0.9939846
##   36        0.01   0.9984619  1     0.9939846
##   36        0.10   0.9984619  1     0.9939846
##   36        1.00   0.9984619  1     0.9939846
##   36       10.00   0.9984389  1     0.9939846
##   37        0.01   0.9984619  1     0.9939846
##   37        0.10   0.9984619  1     0.9939846
##   37        1.00   0.9984619  1     0.9939846
##   37       10.00   0.9984389  1     0.9939846
##   38        0.01   0.9984619  1     0.9939846
##   38        0.10   0.9984619  1     0.9939846
##   38        1.00   0.9984619  1     0.9939846
##   38       10.00   0.9984389  1     0.9939846
##   39        0.01   0.9984619  1     0.9939846
##   39        0.10   0.9984619  1     0.9939846
##   39        1.00   0.9984619  1     0.9939846
##   39       10.00   0.9984619  1     0.9939846
##   40        0.01   0.9984619  1     0.9939846
##   40        0.10   0.9984619  1     0.9939846
##   40        1.00   0.9984619  1     0.9939846
##   40       10.00   0.9984619  1     0.9939846
##   41        0.01   0.9984619  1     0.9939846
##   41        0.10   0.9984619  1     0.9939846
##   41        1.00   0.9984619  1     0.9939846
##   41       10.00   0.9984619  1     0.9939846
##   42        0.01   0.9984619  1     0.9939846
##   42        0.10   0.9984619  1     0.9939846
##   42        1.00   0.9984619  1     0.9939846
##   42       10.00   0.9984619  1     0.9939846
##   43        0.01   0.9984619  1     0.9939846
##   43        0.10   0.9984619  1     0.9939846
##   43        1.00   0.9984619  1     0.9939846
##   43       10.00   0.9984619  1     0.9939846
##   44        0.01   0.9984619  1     0.9939846
##   44        0.10   0.9984619  1     0.9939846
##   44        1.00   0.9984619  1     0.9939846
##   44       10.00   0.9984619  1     0.9939846
##   45        0.01   0.9984619  1     0.9939846
##   45        0.10   0.9984619  1     0.9939846
##   45        1.00   0.9984619  1     0.9939846
##   45       10.00   0.9984619  1     0.9939846
##   46        0.01   0.9984619  1     0.9939846
##   46        0.10   0.9984619  1     0.9939846
##   46        1.00   0.9984619  1     0.9939846
##   46       10.00   0.9984619  1     0.9939846
##   47        0.01   0.9984619  1     0.9939846
##   47        0.10   0.9984619  1     0.9939846
##   47        1.00   0.9984619  1     0.9939846
##   47       10.00   0.9984619  1     0.9939846
##   48        0.01   0.9984619  1     0.9939846
##   48        0.10   0.9984619  1     0.9939846
##   48        1.00   0.9984619  1     0.9939846
##   48       10.00   0.9984619  1     0.9939846
##   49        0.01   0.9984619  1     0.9939846
##   49        0.10   0.9984619  1     0.9939846
##   49        1.00   0.9984619  1     0.9939846
##   49       10.00   0.9984619  1     0.9939846
##   50        0.01   0.9984619  1     0.9939846
##   50        0.10   0.9984619  1     0.9939846
##   50        1.00   0.9984619  1     0.9939846
##   50       10.00   0.9984619  1     0.9939846
## 
## ROC was used to select the optimal model using the largest value.
## The final values used for the model were NumVars = 10 and lambda = 10.

See model summary

summary(model_lda)
##             Length Class      Mode     
## call          6    -none-     call     
## beta         10    -none-     numeric  
## theta         2    -none-     numeric  
## varNames     10    -none-     character
## varIndex     10    -none-     numeric  
## origP         1    -none-     numeric  
## rss           1    -none-     numeric  
## fit           8    lda        list     
## classes       2    -none-     character
## lambda        1    -none-     numeric  
## stop          1    -none-     numeric  
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     2    data.frame list     
## obsLevels     2    -none-     character
## param         1    -none-     list
max((model_lda$results)$ROC)
## [1] 0.9998163

See tuning results

plot(model_lda)

trellis.par.set(caretTheme())
densityplot(model_lda, pch = "|")

trellis.par.set(caretTheme())
plot(model_lda, metric = "ROC", plotType = "level",
     scales = list(x = list(rot = 90)))

lda_train_results <- trainResults(model_lda, "LDA")
## ***MLeval: Machine Learning Model Evaluation***
## Input: caret train function object
## Averaging probs.
## Group 1 type: repeatedcv
## Observations: 662
## Number of groups: 1
## Observations per group: 662
## Positive: yes
## Negative: no
## Group: LDA
## Positive: 331
## Negative: 331
## ***Performance Metrics***

## LDA Optimal Informedness = 0.996978851963746
## LDA AUC-ROC = 1

lda_train_results
##     Resample  accuracy
## 1 Fold1.Rep1 1.0000000
## 2 Fold2.Rep1 0.9924812
## 3 Fold3.Rep1 0.9924242
## 4 Fold4.Rep1 1.0000000
## 5 Fold5.Rep1 1.0000000
cat("5-fold train accuracy: ", mean(lda_train_results[,2]))
## 5-fold train accuracy:  0.9969811
library(ROCit)
prediction_lda <- predict(model_lda, test_df, type = "prob")
ROCit_lda <- rocit(score=prediction_lda[,2],class=test_df$Target)
plot(ROCit_lda, legend = TRUE, YIndex = FALSE, values = TRUE)

summary(ROCit_lda)
##                           
##  Method used: empirical   
##  Number of positive(s): 83
##  Number of negative(s): 83
##  Area under curve: 0.6711
lda_binary <- ifelse(prediction_lda[,2]>0.5, 1, 0)
lda_error <- errorRate(lda_binary, test_df$Target)
cat("\nTest accuracy:   ", 1-lda_error,
    "\nTest error rate: ", lda_error,
    "\nTest AUC:        ", ROCit_lda$AUC)
## 
## Test accuracy:    0.6686747 
## Test error rate:  0.3313253 
## Test AUC:         0.6711424
confusionMatrix(table(lda_binary, test_df$Target), positive="1")
## Confusion Matrix and Statistics
## 
##           
## lda_binary  0  1
##          0 67 39
##          1 16 44
##                                           
##                Accuracy : 0.6687          
##                  95% CI : (0.5915, 0.7397)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : 8.263e-06       
##                                           
##                   Kappa : 0.3373          
##                                           
##  Mcnemar's Test P-Value : 0.003012        
##                                           
##             Sensitivity : 0.5301          
##             Specificity : 0.8072          
##          Pos Pred Value : 0.7333          
##          Neg Pred Value : 0.6321          
##              Prevalence : 0.5000          
##          Detection Rate : 0.2651          
##    Detection Prevalence : 0.3614          
##       Balanced Accuracy : 0.6687          
##                                           
##        'Positive' Class : 1               
## 

Build SVM Here

k = 5
myControl_svm <- trainControl(
                             method = "repeatedcv", number = k,
                             summaryFunction = twoClassSummary,
                             classProbs = TRUE,
                             verboseIter = FALSE,
                             savePredictions = TRUE
                            )
myGrid_svm <- expand.grid(                    
                          C = c(0.25, 0.5, 0.75),
                          degree= c(2,3,4),
                          scale = c(0.001, 0.01, 0.1)
                        )
  
model_svm <- train(Target ~., 
                   data = train_df, 
                   method = "svmPoly", 
                   tuneGrid = myGrid_svm, 
                   metric = "ROC",
                   trControl = myControl_svm,
                   preProcess = c("center", "scale"),
                   verbose = FALSE
              )
model_svm
## Support Vector Machines with Polynomial Kernel 
## 
## 662 samples
## 108 predictors
##   2 classes: 'no', 'yes' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (5 fold, repeated 1 times) 
## Summary of sample sizes: 529, 530, 530, 529, 530 
## Resampling results across tuning parameters:
## 
##   C     degree  scale  ROC        Sens  Spec     
##   0.25  2       0.001  0.9991407  1     0.9939846
##   0.25  2       0.010  0.9994120  1     0.9970149
##   0.25  2       0.100  0.9989597  1     0.9970149
##   0.25  3       0.001  0.9990050  1     0.9939846
##   0.25  3       0.010  0.9994120  1     0.9970149
##   0.25  3       0.100  0.9996382  1     0.9970149
##   0.25  4       0.001  0.9990502  1     0.9939846
##   0.25  4       0.010  0.9992311  1     0.9970149
##   0.25  4       0.100  0.9997739  1     0.9970149
##   0.50  2       0.001  0.9989597  1     0.9909543
##   0.50  2       0.010  0.9996834  1     0.9970149
##   0.50  2       0.100  0.9989597  1     0.9970149
##   0.50  3       0.001  0.9991859  1     0.9909543
##   0.50  3       0.010  0.9994120  1     0.9970149
##   0.50  3       0.100  0.9996382  1     0.9970149
##   0.50  4       0.001  0.9989597  1     0.9939846
##   0.50  4       0.010  0.9992311  1     0.9970149
##   0.50  4       0.100  0.9997739  1     0.9970149
##   0.75  2       0.001  0.9991407  1     0.9909543
##   0.75  2       0.010  0.9996834  1     0.9970149
##   0.75  2       0.100  0.9989597  1     0.9970149
##   0.75  3       0.001  0.9989597  1     0.9939846
##   0.75  3       0.010  0.9994120  1     0.9970149
##   0.75  3       0.100  0.9996382  1     0.9970149
##   0.75  4       0.001  0.9990050  1     0.9939846
##   0.75  4       0.010  0.9992311  1     0.9970149
##   0.75  4       0.100  0.9997739  1     0.9970149
## 
## ROC was used to select the optimal model using the largest value.
## The final values used for the model were degree = 4, scale = 0.1 and C = 0.25.

See model summary

summary(model_svm)
## Length  Class   Mode 
##      1   ksvm     S4
max((model_svm$results)$ROC)
## [1] 0.9997739

See tuning results

plot(model_svm)

trellis.par.set(caretTheme())
densityplot(model_svm, pch = "|")

svm_train_results <- trainResults(model_svm, "SVM")
## ***MLeval: Machine Learning Model Evaluation***
## Input: caret train function object
## Averaging probs.
## Group 1 type: repeatedcv
## Observations: 662
## Number of groups: 1
## Observations per group: 662
## Positive: yes
## Negative: no
## Group: SVM
## Positive: 331
## Negative: 331
## ***Performance Metrics***

## SVM Optimal Informedness = 0.996978851963746
## SVM AUC-ROC = 1

svm_train_results
##     Resample  accuracy
## 1 Fold1.Rep1 0.9924812
## 2 Fold2.Rep1 1.0000000
## 3 Fold3.Rep1 1.0000000
## 4 Fold4.Rep1 0.9991646
## 5 Fold5.Rep1 0.9974747
cat("5-fold train accuracy: ", mean(svm_train_results[,2]))
## 5-fold train accuracy:  0.9978241
library(ROCit)
prediction_svm <- predict(model_svm, test_df, type = "prob")
ROCit_svm <- rocit(score=prediction_svm[,2],class=test_df$Target)
plot(ROCit_svm, legend = TRUE, YIndex = FALSE, values = TRUE)

summary(ROCit_svm)
##                           
##  Method used: empirical   
##  Number of positive(s): 83
##  Number of negative(s): 83
##  Area under curve: 0.6065
svm_binary <- ifelse(prediction_svm[,2]>0.5, 1, 0)
svm_error <- errorRate(svm_binary, test_df$Target)
cat("\nTest accuracy:   ", 1-svm_error,
    "\nTest error rate: ", svm_error,
    "\nTest AUC:        ", ROCit_svm$AUC)
## 
## Test accuracy:    0.5963855 
## Test error rate:  0.4036145 
## Test AUC:         0.6065467
confusionMatrix(table(svm_binary, test_df$Target), positive="1")
## Confusion Matrix and Statistics
## 
##           
## svm_binary  0  1
##          0 50 34
##          1 33 49
##                                           
##                Accuracy : 0.5964          
##                  95% CI : (0.5176, 0.6717)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : 0.007934        
##                                           
##                   Kappa : 0.1928          
##                                           
##  Mcnemar's Test P-Value : 1.000000        
##                                           
##             Sensitivity : 0.5904          
##             Specificity : 0.6024          
##          Pos Pred Value : 0.5976          
##          Neg Pred Value : 0.5952          
##              Prevalence : 0.5000          
##          Detection Rate : 0.2952          
##    Detection Prevalence : 0.4940          
##       Balanced Accuracy : 0.5964          
##                                           
##        'Positive' Class : 1               
## 

Build RF Here

customRF <- list(type = "Classification",
                 library = "randomForest",
                 loop = NULL)

customRF$parameters <- data.frame(parameter = c("mtry", "ntree"),
                                  class = rep("numeric", 2),
                                  label = c("mtry", "ntree"))

customRF$grid <- function(x, y, len = NULL, search = "grid") {}

customRF$fit <- function(x, y, wts, param, lev, last, weights, classProbs) {
  randomForest(x, y,
               mtry = param$mtry,
               ntree=param$ntree)
}

#Predict label
customRF$predict <- function(modelFit, newdata, preProc = NULL, submodels = NULL)
   predict(modelFit, newdata)

#Predict prob
customRF$prob <- function(modelFit, newdata, preProc = NULL, submodels = NULL)
   predict(modelFit, newdata, type = "prob")

customRF$sort <- function(x) x[order(x[,1]),]
customRF$levels <- function(x) x$classes
k = 5

library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:psych':
## 
##     outlier
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(mlbench)
library(e1071)

mtry <- sqrt(ncol(train_df)-1)
myControl_rf <- trainControl(
                             method = "repeatedcv", number = k,
                             summaryFunction = twoClassSummary,
                             classProbs = TRUE,
                             verboseIter = FALSE,
                             savePredictions = TRUE
                            )
myGrid_rf <- expand.grid(.mtry=c(10:15), .ntree=c(1000,1500,2000,2500))
model_rf <- train(Target ~., 
                 data = train_df, 
                 method = customRF,
                 tuneGrid = myGrid_rf, 
                 metric = "ROC",
                 trControl = myControl_rf,
                 preProcess = c("center", "scale"))

model_rf
## 662 samples
## 108 predictors
##   2 classes: 'no', 'yes' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (5 fold, repeated 1 times) 
## Summary of sample sizes: 529, 530, 529, 530, 530 
## Resampling results across tuning parameters:
## 
##   mtry  ntree  ROC        Sens  Spec     
##   10    1000   0.9998852  1     0.9939394
##   10    1500   0.9998623  1     0.9939394
##   10    2000   0.9999082  1     0.9939394
##   10    2500   0.9999082  1     0.9939394
##   11    1000   0.9998623  1     0.9939394
##   11    1500   0.9999082  1     0.9939394
##   11    2000   0.9999082  1     0.9939394
##   11    2500   0.9999082  1     0.9939394
##   12    1000   0.9999082  1     0.9939394
##   12    1500   0.9998623  1     0.9939394
##   12    2000   0.9998623  1     0.9939394
##   12    2500   0.9999082  1     0.9939394
##   13    1000   0.9999082  1     0.9939394
##   13    1500   0.9998623  1     0.9939394
##   13    2000   0.9998623  1     0.9939394
##   13    2500   0.9998623  1     0.9939394
##   14    1000   0.9998623  1     0.9939394
##   14    1500   0.9999082  1     0.9939394
##   14    2000   0.9998623  1     0.9939394
##   14    2500   0.9998623  1     0.9939394
##   15    1000   0.9999082  1     0.9939394
##   15    1500   0.9998852  1     0.9939394
##   15    2000   0.9999082  1     0.9939394
##   15    2500   0.9998623  1     0.9939394
## 
## ROC was used to select the optimal model using the largest value.
## The final values used for the model were mtry = 10 and ntree = 2000.

See model summary

summary(model_rf)
##                 Length Class      Mode     
## call               5   -none-     call     
## type               1   -none-     character
## predicted        662   factor     numeric  
## err.rate        6000   -none-     numeric  
## confusion          6   -none-     numeric  
## votes           1324   matrix     numeric  
## oob.times        662   -none-     numeric  
## classes            2   -none-     character
## importance       108   -none-     numeric  
## importanceSD       0   -none-     NULL     
## localImportance    0   -none-     NULL     
## proximity          0   -none-     NULL     
## ntree              1   -none-     numeric  
## mtry               1   -none-     numeric  
## forest            14   -none-     list     
## y                662   factor     numeric  
## test               0   -none-     NULL     
## inbag              0   -none-     NULL     
## xNames           108   -none-     character
## problemType        1   -none-     character
## tuneValue          2   data.frame list     
## obsLevels          2   -none-     character
## param              0   -none-     list
max((model_rf$results)$ROC)
## [1] 0.9999082

See tuning results

plot(model_rf)

trellis.par.set(caretTheme())
plot(model_rf, metric = "ROC", plotType = "level",
     scales = list(x = list(rot = 90)))

rf_train_results <- trainResults(model_rf, "Random Forest")
## ***MLeval: Machine Learning Model Evaluation***
## Input: caret train function object
## Averaging probs.
## Group 1 type: repeatedcv
## Observations: 662
## Number of groups: 1
## Observations per group: 662
## Positive: yes
## Negative: no
## Group: Random Forest
## Positive: 331
## Negative: 331
## ***Performance Metrics***

## Random Forest Optimal Informedness = 0.996978851963746
## Random Forest AUC-ROC = 1

rf_train_results
##     Resample  accuracy
## 1 Fold1.Rep1 1.0000000
## 2 Fold2.Rep1 1.0000000
## 3 Fold3.Rep1 1.0000000
## 4 Fold4.Rep1 1.0000000
## 5 Fold5.Rep1 0.9848485
cat("5-fold train accuracy: ", mean(rf_train_results[,2]))
## 5-fold train accuracy:  0.9969697
library(ROCit)
prediction_rf <- predict(model_rf, test_df, type = "prob")
ROCit_rf <- rocit(score=prediction_rf[,2],class=test_df$Target)
plot(ROCit_rf, legend = TRUE, YIndex = FALSE, values = TRUE)

summary(ROCit_rf)
##                           
##  Method used: empirical   
##  Number of positive(s): 83
##  Number of negative(s): 83
##  Area under curve: 0.6541
rf_binary <- ifelse(prediction_rf[,2]>0.5, 1, 0)
rf_error <- errorRate(rf_binary, test_df$Target)
cat("\nTest accuracy:   ", 1-rf_error,
    "\nTest error rate: ", rf_error,
    "\nTest AUC:        ", ROCit_rf$AUC)
## 
## Test accuracy:    0.6024096 
## Test error rate:  0.3975904 
## Test AUC:         0.6540862
confusionMatrix(table(rf_binary, test_df$Target), positive="1")
## Confusion Matrix and Statistics
## 
##          
## rf_binary  0  1
##         0 48 31
##         1 35 52
##                                           
##                Accuracy : 0.6024          
##                  95% CI : (0.5237, 0.6774)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : 0.005106        
##                                           
##                   Kappa : 0.2048          
##                                           
##  Mcnemar's Test P-Value : 0.711923        
##                                           
##             Sensitivity : 0.6265          
##             Specificity : 0.5783          
##          Pos Pred Value : 0.5977          
##          Neg Pred Value : 0.6076          
##              Prevalence : 0.5000          
##          Detection Rate : 0.3133          
##    Detection Prevalence : 0.5241          
##       Balanced Accuracy : 0.6024          
##                                           
##        'Positive' Class : 1               
## 

Compare Models

resamps <- resamples(list(LR = model_lr,
                          LDA = model_lda,
                          SVM = model_svm,
                          RF = model_rf))
summary(resamps)
## 
## Call:
## summary.resamples(object = resamps)
## 
## Models: LR, LDA, SVM, RF 
## Number of resamples: 5 
## 
## ROC 
##          Min. 1st Qu. Median      Mean 3rd Qu. Max. NA's
## LR  0.9988522       1      1 0.9997704       1    1    0
## LDA 0.9990817       1      1 0.9998163       1    1    0
## SVM 0.9988693       1      1 0.9997739       1    1    0
## RF  0.9995409       1      1 0.9999082       1    1    0
## 
## Sens 
##     Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## LR     1       1      1    1       1    1    0
## LDA    1       1      1    1       1    1    0
## SVM    1       1      1    1       1    1    0
## RF     1       1      1    1       1    1    0
## 
## Spec 
##          Min.   1st Qu. Median      Mean 3rd Qu. Max. NA's
## LR  0.9848485 1.0000000      1 0.9969697       1    1    0
## LDA 0.9848485 0.9850746      1 0.9939846       1    1    0
## SVM 0.9850746 1.0000000      1 0.9970149       1    1    0
## RF  0.9696970 1.0000000      1 0.9939394       1    1    0
theme1 <- trellis.par.get()
theme1$plot.symbol$col = rgb(.2, .2, .2, .4)
theme1$plot.symbol$pch = 16
theme1$plot.line$col = rgb(1, 0, 0, .7)
theme1$plot.line$lwd <- 2
trellis.par.set(theme1)
bwplot(resamps, layout = c(3, 1))

trellis.par.set(caretTheme())
dotplot(resamps, metric = "ROC")

trellis.par.set(theme1)
xyplot(resamps, what = "BlandAltman")

splom(resamps)